Goals

Goal 1: Determine how the different trimming parameters affect the methylation stats notebook link for parameters

Goal 2: Determine if any methylation stats correlate with: DNA extraction date, Lib prep date, life stage or treatments

Goal 3: Generate methylation statistics

Goal 1: Determine how the different trimming parameters affect the methylation stats

# Read in required libraries
library("reshape")
#library(plyr)
library("dplyr")
library("tidyverse")
library("Rmisc")
library(gridExtra)
library(ggpubr)
library(tidyverse)
library(ggplot2)
library(RColorBrewer)
library(lme4)
library(lmerTest)
library(car)
library(effects)
library(ggfortify)
library(cowplot)
library(vegan)
library(corrr)
library(ggcorrplot)
library(GGally)
library(broom)
library(cowplot)
library(arsenal)
library(patchwork)
library(tidyr)
library(ggrepel)

# load data
data <- read.csv("../data/WGBS/Thermal_Transplant_Methylseq_Stats.csv")

data2 <- data

# Removing characters in columns and turning the values numeric
data2$percent_Aligned <- as.numeric(sub("%","",data2$percent_Aligned))
data2$percent_Dups <- as.numeric(sub("%","",data2$percent_Dups))
data2$percent_Aligned <- as.numeric(sub("%","",data2$percent_Aligned))
data2$percent_mCHG <- as.numeric(sub("%","",data2$percent_mCHG))
data2$percent_mCHH <- as.numeric(sub("%","",data2$percent_mCHH))
data2$percent_mCpG <- as.numeric(sub("%","",data2$percent_mCpG))
data2$R1_percent_BP_Trimmed <- as.numeric(sub("%","",data2$R1_percent_BP_Trimmed))
data2$R1_percent_Dups <- as.numeric(sub("%","",data2$R1_percent_Dups))
data2$R1_percent_GC <- as.numeric(sub("%","",data2$R1_percent_GC))
data2$R2_percent_BP_Trimmed <- as.numeric(sub("%","",data2$R2_percent_BP_Trimmed))
data2$R2_percent_Dups <- as.numeric(sub("%","",data2$R2_percent_Dups))
data2$R2_percent_GC <- as.numeric(sub("%","",data2$R2_percent_GC))

data2$Mean_cov <- as.numeric(sub("X","",data2$Mean_cov))
data2$Median_cov <- as.numeric(sub("X","",data2$Median_cov))

#removing metadata columns
data3 <- data2[-c(1, 4:9)]

data4 <- melt(data3, id = c("Trimming", "Sample"))

data5 <- data4 %>%
  pivot_wider(names_from = Trimming, values_from = value)

trim_corr_plot <- ggplot(data = data5, aes(x=Original, y=Trim_3))+
  ylab("Trim_3")+ xlab("Original") + 
  geom_point()+
  geom_smooth(method = "lm") +
#  stat_regline_equation(label.y = 2.0, aes(label = ..eq.label..)) +
  stat_regline_equation(label.y.npc = 'bottom', label.x.npc = 'center', aes(label = ..rr.label..)) +
  theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_wrap(~ variable, scales = "free")

ggsave(filename="../output/WGBS/Trim_corr_plot.jpeg", plot=trim_corr_plot, dpi=300, width=12, height=12, units="in")

Goal 2: Determine if any methylation stats correlate with: DNA extraction date, Lib prep date, life stage or treatments

# Just using Trim_3 data 

data6 <- data2 %>% filter(Trimming == "Trim_3") 
data7 <- data6[-c(1:2, 19:26)]
data8 <- melt(data7, id = c(1:7))

# Turning variable columns into factors 
col <- c("Sample", "Coral.ID", "History", "Life_Stage", "Gel_Result", "Extraction_Date", "PMS_Date")
data8[col] <- lapply(data8[col], factor)

History adult

data8A <- data8 %>% filter(Life_Stage == "Adult")

AHistory_Box <- ggplot(data8A, aes(x=History, y=value)) +
  geom_boxplot(width=.5, outlier.shape= NA, position = position_dodge(width = 0.5), alpha = 0.7) +
  geom_point(pch = 21) +
  xlab("History") +  ylab("Value") + #Axis titles
  theme_bw() + 
  theme(panel.border = element_rect(color="black", fill=NA, size=0.75), 
        panel.grid.major = element_blank(), #Makes background theme white
        panel.grid.minor = element_blank(), 
        axis.line = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_wrap(~ variable, scales = "free")

ggsave(filename="../output/WGBS/AHistory_Box.jpeg", plot=AHistory_Box, dpi=300, width=12, height=12, units="in")

History Larvae

data8L <- data8 %>% filter(Life_Stage == "Larvae")

LHistory_Box <- ggplot(data8L, aes(x=History, y=value)) +
  geom_boxplot(width=.5, outlier.shape= NA, position = position_dodge(width = 0.5), alpha = 0.7) +
  geom_point(pch = 21) +
  xlab("History") +  ylab("Value") + #Axis titles
  theme_bw() + 
  theme(panel.border = element_rect(color="black", fill=NA, size=0.75), 
        panel.grid.major = element_blank(), #Makes background theme white
        panel.grid.minor = element_blank(), 
        axis.line = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_wrap(~ variable, scales = "free")

ggsave(filename="../output/WGBS/LHistory_Box.jpeg", plot=LHistory_Box, dpi=300, width=12, height=12, units="in")

Lifestage

LifeStage_Box <- ggplot(data8, aes(x=Life_Stage, y=value)) +
  geom_boxplot(width=.5, outlier.shape= NA, position = position_dodge(width = 0.5), alpha = 0.7) +
  geom_point(pch = 21) +
  xlab("Life Stage") +  ylab("Value") + #Axis titles
  theme_bw() + 
  theme(panel.border = element_rect(color="black", fill=NA, size=0.75), 
        panel.grid.major = element_blank(), #Makes background theme white
        panel.grid.minor = element_blank(), 
        axis.line = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_wrap(~ variable, scales = "free")

ggsave(filename="../output/WGBS/LifeStage_Box.jpeg", plot=LifeStage_Box, dpi=300, width=12, height=12, units="in")

Gel Result

Gel_Box <- ggplot(data8, aes(x=Gel_Result, y=value)) +
  geom_boxplot(width=.5, outlier.shape= NA, position = position_dodge(width = 0.5), alpha = 0.7) +
  geom_point(pch = 21) +
  xlab("Gel Result") +  ylab("Value") + #Axis titles
  theme_bw() + 
  theme(panel.border = element_rect(color="black", fill=NA, size=0.75), 
        panel.grid.major = element_blank(), #Makes background theme white
        panel.grid.minor = element_blank(), 
        axis.line = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_wrap(~ variable, scales = "free")

ggsave(filename="../output/WGBS/Gel_Box.jpeg", plot=Gel_Box, dpi=300, width=12, height=12, units="in")

Extraction Date

Extraction_Box <- ggplot(data8, aes(x=Extraction_Date, y=value)) +
  geom_boxplot(width=.5, outlier.shape= NA, position = position_dodge(width = 0.5), alpha = 0.7) +
  geom_point(pch = 21) +
  xlab("Extraction Date") +  ylab("Value") + #Axis titles
  theme_bw() + 
  theme(panel.border = element_rect(color="black", fill=NA, size=0.75), 
        panel.grid.major = element_blank(), #Makes background theme white
        panel.grid.minor = element_blank(), 
        axis.line = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_wrap(~ variable, scales = "free")

ggsave(filename="../output/WGBS/Extraction_Box.jpeg", plot=Extraction_Box, dpi=300, width=12, height=12, units="in")

PMS Date

PMS_Box <- ggplot(data8, aes(x=PMS_Date, y=value)) +
  geom_boxplot(width=.5, outlier.shape= NA, position = position_dodge(width = 0.5), alpha = 0.7) +
  geom_point(pch = 21) +
  xlab("PMS Library Prep Date") +  ylab("Value") + #Axis titles
  theme_bw() + 
  theme(panel.border = element_rect(color="black", fill=NA, size=0.75), 
        panel.grid.major = element_blank(), #Makes background theme white
        panel.grid.minor = element_blank(), 
        axis.line = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_wrap(~ variable, scales = "free")

ggsave(filename="../output/WGBS/PMS_Box.jpeg", plot=PMS_Box, dpi=300, width=12, height=12, units="in")

Goal 3: Generate Methylation statistics

#Adult
data9A <- data8A %>% 
  select(Sample, History, variable, value) %>%
  pivot_wider(names_from = variable, values_from = value) %>%
  select(-Sample)

tableby_adult <- tableby(History ~., data = data9A)
summary(tableby_adult)
Patch-Ambient-Patch (N=4) Patch-Ambient-Rim (N=4) Patch-Heated-Patch (N=4) Patch-Heated-Rim (N=4) Rim-Ambient-Patch (N=4) Rim-Ambient-Rim (N=4) Rim-Heated-Patch (N=4) Rim-Heated-Rim (N=4) Total (N=32) p value
percent_mCpG 0.098
   Mean (SD) 7.950 (0.173) 7.825 (0.499) 8.150 (0.191) 8.175 (0.585) 7.425 (0.222) 7.600 (0.476) 7.500 (0.616) 7.525 (0.340) 7.769 (0.468)
   Range 7.700 - 8.100 7.400 - 8.500 8.000 - 8.400 7.500 - 8.800 7.200 - 7.700 7.300 - 8.300 6.800 - 8.300 7.100 - 7.800 6.800 - 8.800
percent_mCHG 0.942
   Mean (SD) 0.800 (0.356) 0.825 (0.519) 0.675 (0.206) 0.625 (0.050) 0.675 (0.222) 0.600 (0.141) 0.750 (0.173) 0.775 (0.419) 0.716 (0.275)
   Range 0.500 - 1.300 0.500 - 1.600 0.500 - 0.900 0.600 - 0.700 0.500 - 1.000 0.500 - 0.800 0.600 - 0.900 0.500 - 1.400 0.500 - 1.600
percent_mCHH 0.959
   Mean (SD) 1.450 (0.436) 1.400 (0.678) 1.200 (0.245) 1.150 (0.173) 1.325 (0.519) 1.175 (0.171) 1.325 (0.299) 1.225 (0.544) 1.281 (0.386)
   Range 1.200 - 2.100 0.900 - 2.400 1.000 - 1.500 0.900 - 1.300 1.000 - 2.100 1.000 - 1.400 1.000 - 1.700 0.800 - 2.000 0.800 - 2.400
M_Cs 0.961
   Mean (SD) 340.750 (57.838) 320.075 (149.378) 400.300 (77.135) 312.650 (140.697) 346.075 (48.523) 375.125 (113.891) 357.475 (114.043) 378.950 (168.775) 353.925 (106.250)
   Range 260.700 - 391.400 119.400 - 473.000 333.300 - 509.700 123.000 - 451.300 284.100 - 390.300 282.600 - 540.600 222.700 - 485.700 139.400 - 533.800 119.400 - 540.600
percent_Dups 0.300
   Mean (SD) 15.750 (4.948) 14.250 (2.779) 15.850 (3.328) 11.050 (1.066) 12.875 (1.733) 15.475 (1.660) 12.575 (0.950) 16.150 (6.125) 14.247 (3.432)
   Range 11.200 - 21.100 11.800 - 16.900 11.600 - 18.600 9.900 - 12.400 11.500 - 15.400 13.800 - 17.000 11.500 - 13.600 10.600 - 24.900 9.900 - 24.900
percent_Aligned 0.042
   Mean (SD) 34.675 (1.394) 34.900 (2.255) 37.625 (1.688) 34.300 (2.357) 37.500 (1.472) 33.625 (1.962) 37.100 (2.906) 33.950 (2.293) 35.459 (2.432)
   Range 33.000 - 36.400 31.600 - 36.400 35.300 - 39.100 31.700 - 37.200 35.300 - 38.400 31.200 - 35.500 32.800 - 39.200 30.800 - 35.700 30.800 - 39.200
Ins_size 0.599
   Mean (SD) 96.750 (8.884) 93.500 (7.594) 98.750 (4.573) 103.750 (26.475) 89.000 (8.165) 103.000 (18.511) 90.750 (10.844) 105.500 (12.767) 97.625 (13.585)
   Range 91.000 - 110.000 86.000 - 103.000 93.000 - 104.000 85.000 - 143.000 77.000 - 95.000 88.000 - 128.000 77.000 - 103.000 91.000 - 122.000 77.000 - 143.000
Median_cov 0.850
   Mean (SD) 2.250 (0.500) 1.750 (1.258) 2.750 (0.957) 2.000 (1.414) 1.750 (0.500) 2.000 (0.816) 2.000 (0.816) 1.750 (1.258) 2.031 (0.933)
   Range 2.000 - 3.000 0.000 - 3.000 2.000 - 4.000 0.000 - 3.000 1.000 - 2.000 1.000 - 3.000 1.000 - 3.000 0.000 - 3.000 0.000 - 4.000
Mean_cov 0.973
   Mean (SD) 4.025 (0.665) 3.800 (1.683) 4.700 (1.003) 3.700 (1.817) 4.200 (0.497) 4.275 (1.021) 4.400 (1.476) 4.300 (1.851) 4.175 (1.223)
   Range 3.100 - 4.600 1.500 - 5.400 3.800 - 6.100 1.300 - 5.600 3.600 - 4.700 3.400 - 5.700 2.600 - 6.100 1.600 - 5.800 1.300 - 6.100
table_adult <- as.data.frame(summary(tableby_adult))
write.csv(table_adult, "../output/WGBS/Adult_WGBS_Summarystats.csv")

#Larvae
data9L <- data8L %>% 
  select(Sample, History, variable, value) %>%
  pivot_wider(names_from = variable, values_from = value) %>%
  select(-Sample)

tableby_larvae <- tableby(History ~., data = data9L)
summary(tableby_larvae)
Patch-Ambient-Patch (N=4) Patch-Heated-Patch (N=4) Rim-Ambient-Patch (N=4) Rim-Heated-Patch (N=3) Total (N=15) p value
percent_mCpG 0.066
   Mean (SD) 7.425 (0.675) 8.400 (0.258) 7.400 (0.408) 7.600 (0.693) 7.713 (0.637)
   Range 6.800 - 8.300 8.100 - 8.700 6.800 - 7.700 7.200 - 8.400 6.800 - 8.700
percent_mCHG 0.830
   Mean (SD) 0.775 (0.350) 0.650 (0.173) 0.675 (0.150) 0.633 (0.153) 0.687 (0.210)
   Range 0.600 - 1.300 0.500 - 0.800 0.600 - 0.900 0.500 - 0.800 0.500 - 1.300
percent_mCHH 0.117
   Mean (SD) 1.250 (0.379) 0.850 (0.173) 1.175 (0.150) 0.967 (0.058) 1.067 (0.266)
   Range 1.000 - 1.800 0.700 - 1.000 1.000 - 1.300 0.900 - 1.000 0.700 - 1.800
M_Cs 0.196
   Mean (SD) 325.975 (100.575) 448.725 (115.826) 396.900 (120.304) 498.567 (27.567) 412.140 (111.458)
   Range 194.400 - 439.400 311.900 - 594.300 300.200 - 561.800 478.500 - 530.000 194.400 - 594.300
percent_Dups 0.342
   Mean (SD) 13.200 (2.288) 16.175 (5.121) 12.275 (2.497) 15.333 (0.321) 14.173 (3.290)
   Range 11.000 - 16.100 11.100 - 23.300 10.800 - 16.000 15.100 - 15.700 10.800 - 23.300
percent_Aligned 0.511
   Mean (SD) 41.000 (2.094) 40.450 (1.112) 39.100 (1.988) 40.200 (1.670) 40.187 (1.731)
   Range 39.500 - 44.100 38.900 - 41.500 36.300 - 41.000 38.700 - 42.000 36.300 - 44.100
Ins_size 0.004
   Mean (SD) 80.250 (3.862) 107.500 (6.403) 88.750 (10.243) 96.000 (10.536) 92.933 (12.803)
   Range 76.000 - 84.000 103.000 - 117.000 76.000 - 98.000 86.000 - 107.000 76.000 - 117.000
Median_cov 0.028
   Mean (SD) 1.750 (0.500) 3.000 (0.816) 2.250 (0.500) 3.000 (0.000) 2.467 (0.743)
   Range 1.000 - 2.000 2.000 - 4.000 2.000 - 3.000 3.000 - 3.000 1.000 - 4.000
Mean_cov 0.331
   Mean (SD) 4.175 (1.276) 5.050 (1.190) 4.825 (1.302) 5.900 (0.608) 4.927 (1.200)
   Range 2.500 - 5.600 3.600 - 6.500 3.800 - 6.600 5.500 - 6.600 2.500 - 6.600
table_Larvae <- as.data.frame(summary(tableby_larvae))
write.csv(table_Larvae, "../output/WGBS/Larvae_WGBS_Summarystats.csv")